In [1]:
# Required Packages
import pandas as pd
import numpy as np

# Modeling
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

# preprocessing
from sklearn.preprocessing import StandardScaler

# keras
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD, Adagrad
from keras.utils.vis_utils import plot_model
import keras.backend as K

# Visualisation libraries

## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex

## progress bar
import progressbar

## seaborn
import seaborn as sns
sns.set_context('paper', rc={'font.size':12,'axes.titlesize':14,'axes.labelsize':12})
sns.set_style('white')

## matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Polygon
import matplotlib.gridspec as gridspec
import matplotlib.colors
from pylab import rcParams
plt.style.use('seaborn-whitegrid')
plt.rcParams['figure.figsize'] = 14, 8
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline

## plotly
from plotly.offline import init_notebook_mode, iplot 
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
%config InlineBackend.figure_format = 'retina' 

## missingno
import missingno as msno

import warnings
warnings.filterwarnings("ignore")
Using TensorFlow backend.

Starbucks Offer Personalizations

In this article, we investigate a set simulated dataset that mimics customer behavior on the Starbucks rewards mobile app. Starbucks tends to send out offers to users of the mobile app once every few days. These offers are exclusive, that is not all users receive the same offer. An offer can contain a discount for their products or sometimes BOGO (buy one get one free). These offers have a validity period before the offer expires. The article here is inspired by a towardsdatascience.com article.

Loading the Datasets

In [2]:
def Line(N): return N*'='
def Header(Inp, Length = 120):
    print(Back.BLACK + Fore.CYAN + Style.NORMAL + '%s' % Inp + Style.RESET_ALL
         + Fore.BLUE + Style.NORMAL + ' %s' % Line(Length- len(Inp) - 1) + Style.RESET_ALL)
def Bottom(Length = 120):
    print(Fore.BLUE + Style.NORMAL + '%s' % Line(Length) + Style.RESET_ALL)
    
# Portfolio Dataset
Header('Portfolio Dataset:')
Portfolio = pd.read_csv('StarBucks/Portfolio_Clean.csv')
display(Portfolio.head().style.hide_index())

# Profile Dataset
Header('Profile Dataset:')
Profile = pd.read_csv('StarBucks/Profile_Clean.csv')
display(Profile.head().style.hide_index())

# Transcript Dataset
Header('Transcript Dataset:')
Transcript = pd.read_csv('StarBucks/Transcript_Clean.csv')
display(Transcript.head().style.hide_index())
Bottom()

User_Data = pd.read_csv('StarBucks/User_Data.csv')
Data = pd.read_csv('StarBucks/Data.csv')
Portfolio Dataset: =====================================================================================================
Reward Difficulty Duration Offer_Type Offer_ID Email Mobile Social Web
10 10 7 BOGO ae264e3637204a6fb9bb56bc8210ddfd 1 1 1 0
10 10 5 BOGO 4d5c57ea9a6940dd891ad53e9dbe8da0 1 1 1 1
0 0 4 Informational 3f207df678b143eea3cee63160fa8bed 1 1 0 1
5 5 7 BOGO 9b98b8c7a33c4b65b9aebfe6a799e6d9 1 1 0 1
5 20 10 Discount 0b1e1539f2cc45b7b9fa7c272da2e1d7 1 0 0 1
Profile Dataset: =======================================================================================================
Gender Age ID Became_Member_On Income Member_Since_Year Member_Tenure
Other 55.000000 68be06ca386d4c31939f3a4f0e3dd783 2017-02-12 64000.000000 2017 23.000000
Female 55.000000 0610b486422d4921ae7d2bf64640c50b 2017-07-15 112000.000000 2017 18.000000
Other 55.000000 38fe809add3b4fcf9315a9694bb96ff5 2018-07-12 64000.000000 2018 6.000000
Female 75.000000 78afa995795e4d85b5d9ceeca43f5fef 2017-05-09 100000.000000 2017 20.000000
Other 55.000000 a03223e636434f42ac4c3df47e8bac43 2017-08-04 64000.000000 2017 17.000000
Transcript Dataset: ====================================================================================================
Person Event Value Time Amount Reward Offer_ID
78afa995795e4d85b5d9ceeca43f5fef Offer Received {'offer id': '9b98b8c7a33c4b65b9aebfe6a799e6d9'} 0 nan nan 9b98b8c7a33c4b65b9aebfe6a799e6d9
a03223e636434f42ac4c3df47e8bac43 Offer Received {'offer id': '0b1e1539f2cc45b7b9fa7c272da2e1d7'} 0 nan nan 0b1e1539f2cc45b7b9fa7c272da2e1d7
e2127556f4f64592b11af22de27a7932 Offer Received {'offer id': '2906b810c7d4411798c6938adc9daaa5'} 0 nan nan 2906b810c7d4411798c6938adc9daaa5
8ec6ce2a7e7949b1bf142def7d0e0586 Offer Received {'offer id': 'fafdcd668e3743c1bb461111dcafc2a4'} 0 nan nan fafdcd668e3743c1bb461111dcafc2a4
68617ca6246f4fbc85e91a2a49552598 Offer Received {'offer id': '4d5c57ea9a6940dd891ad53e9dbe8da0'} 0 nan nan 4d5c57ea9a6940dd891ad53e9dbe8da0
========================================================================================================================

Modeling

The object of the exercise is determining the best offer type for a given user. This can be done via a classification method that provides a probability as well. Here we use the sklearn MultiOutputClassifier with RandomForestClassifier for our modeling.

In [3]:
User_Data = User_Data.drop(['No_Offer', 'BOGO_comp', 'Info_comp', 'Disc_comp', 'Tot_Rewards_Rec', 'Offer_Difficulty'], axis=1)
Target = {'BOGO_offer':'BOGO Offers', 'Disc_offer': 'Discount Offers','Info_offer':'Informational Offers'}

Correlation

Furthermore, let's look at the variance of our dataset features.

In [4]:
display(User_Data.drop(columns = list(Target.keys())).var().sort_values(ascending = False).to_frame(name= 'Variance')\
        .style.background_gradient(cmap='OrRd').set_precision(2))
Variance
Income 407020446.74
Tot_Tran_Amnt 15856.51
Offer_Trans_Amnt 3756.03
Tran_Amnt_per_Offer 647.57
Age 261.47
Ave_Tran_Amnt 255.95
Member_Tenure 182.78
Tot_Tran_Cnt 26.18
Reward_per_Offer 20.71
Difficulty_per_Offer 18.05
Offer_View 2.99
Offer Completed 2.03
Disc_Offer_Rec 1.28
BOGO_Offer_Rec 1.26
Info_Offer_Rec 0.77
Gender_Male 0.25
Gender_Female 0.23
Gender_Other 0.12
Offer_Comp_View_Ratio 0.11
Offer_Comp_Rec_Ratio 0.09
Offer_Trans_Amnt_Ratio 0.08
Offer_Tran_Cnt_Ratio 0.06

Furthermore, we would like to standardize features by removing the mean and scaling to unit variance. In this article, we demonstrated the benefits of scaling data using StandardScaler().

In [5]:
Temp = User_Data.columns.tolist()
Temp = list(set(Temp) - set(list(Target.keys())))

scaler = StandardScaler()
User_Data[Temp] = scaler.fit_transform(User_Data[Temp])
Temp = User_Data[Temp].var().sort_values(ascending = False).to_frame(name= 'Variance')    
display(Temp.style.background_gradient(cmap=sns.light_palette("green", as_cmap=True)).set_precision(2))
Variance
Offer_Comp_Rec_Ratio 1.00
Offer Completed 1.00
Disc_Offer_Rec 1.00
Gender_Other 1.00
Gender_Female 1.00
Offer_View 1.00
Tot_Tran_Cnt 1.00
BOGO_Offer_Rec 1.00
Ave_Tran_Amnt 1.00
Offer_Comp_View_Ratio 1.00
Age 1.00
Tot_Tran_Amnt 1.00
Member_Tenure 1.00
Income 1.00
Offer_Tran_Cnt_Ratio 1.00
Offer_Trans_Amnt 1.00
Reward_per_Offer 1.00
Tran_Amnt_per_Offer 1.00
Offer_Trans_Amnt_Ratio 1.00
Difficulty_per_Offer 1.00
Gender_Male 1.00
Info_Offer_Rec 1.00
In [6]:
def Correlation_Plot (Df,Fig_Size):
    Correlation_Matrix = Df.corr().round(2)
    mask = np.zeros_like(Correlation_Matrix)
    mask[np.triu_indices_from(mask)] = True
    for i in range(len(mask)):
        mask[i,i]=0
    Fig, ax = plt.subplots(figsize=(Fig_Size,Fig_Size))
    sns.heatmap(Correlation_Matrix, ax=ax, mask=mask, annot=True, square=True, 
                cmap =sns.color_palette("Greens", n_colors=10), linewidths = 0.2, vmin=0, vmax=1, cbar_kws={"shrink": .6})

Feat_Dict = {'BOGO_Offer_Rec':'BOGO Offer Received', 'Difficulty_per_Offer':'Difficulty per Offer',
             'Disc_Offer_Rec':'Discount Offer Received', 'Gender_Female':'Gender: Female',
             'Gender_Male':'Gender: Male', 'Gender_Other':'Gender: Other', 'Info_Offer_Rec':'Informational Offer Received',
             'Member_Tenure':'Member Tenure', 'Offer_Comp_Rec_Ratio': 'Offer Completed Receive Ratio',
             'Offer_Comp_View_Ratio':'Viewed Offer Completed Ratio', 'Offer_Tran_Cnt_Ratio':'Offer Transaction Count Ratio' ,
             'Offer_Trans_Amnt':'Offer Transaction Amount', 'Offer_Trans_Amnt_Ratio':'Offer Transaction Amount Ratio',
             'Offer_View': 'Viewed Offer', 'Reward_per_Offer': 'Reward per Offer',
             'Tot_Tran_Amnt':'Total Transaction Amount', 'Tot_Tran_Cnt':'otal Transaction Count',
             'Tran_Amnt_per_Offer':'Transactions Amount per Offer','offer_comp': 'Offer Completed',
             'Ave_Tran_Amnt': 'Average Transaction Amount', 'BOGO_Offer_Rec':'BOGO Offer',
             'Disc_Offer':'Discount Offer', 'Info_Offer': 'Informational Offer'}
Temp = User_Data.copy()
Temp.rename(columns = Feat_Dict, inplace = True)
Correlation_Plot(Temp, 14)
In [7]:
X= User_Data.drop(columns = list(Target.keys()))
y = User_Data[list(Target.keys())].astype(int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
pd.DataFrame(data={'Set':['X_train','X_test','y_train','y_test'],
               'Shape':[X_train.shape, X_test.shape, y_train.shape, y_test.shape]}).set_index('Set').T
Out[7]:
Set X_train X_test y_train y_test
Shape (11900, 22) (5100, 22) (11900, 3) (5100, 3)

Neural Network using Keras Sequential Model

We can implement a neural network model using Sequential of Keras. Therefore,

In [8]:
model = Sequential()
model.add(Dense(81, input_dim= X.shape[1], init='uniform', activation='sigmoid', name='Layer1'))
model.add(Dense(27, init='uniform', activation='sigmoid', name='Layer2'))
model.add(Dense(3, init='uniform', activation='sigmoid', name='Layer3'))
model.add(Dense(y.shape[1], init='uniform', activation='sigmoid', name='Layer4'))
model.summary()
plot_model(model, show_shapes=True, show_layer_names=True, expand_nested = True)
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
Layer1 (Dense)               (None, 81)                1863      
_________________________________________________________________
Layer2 (Dense)               (None, 27)                2214      
_________________________________________________________________
Layer3 (Dense)               (None, 3)                 84        
_________________________________________________________________
Layer4 (Dense)               (None, 3)                 12        
=================================================================
Total params: 4,173
Trainable params: 4,173
Non-trainable params: 0
_________________________________________________________________
Out[8]:
In [9]:
# Number of iterations
IT = int(5e2)+1

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy','mae', 'mse'])

# Train model
history = model.fit(X_train, y_train, nb_epoch= IT, batch_size=50,  verbose=0)
# Predications and Score
y_pred = model.predict(X_test)
score = model.evaluate(X_test, y_test) 
5100/5100 [==============================] - 0s 15us/step
In [10]:
score = pd.DataFrame(score, index = model.metrics_names).T
history = pd.DataFrame(history.history)
display(score.style.hide_index())
loss accuracy mae mse
0.165181 0.954445 0.048162 0.037365
In [11]:
fig = go.Figure()
fig.add_trace(go.Scatter(x= history.index.values, y= history['loss'].values, line=dict(color='OrangeRed', width= 1.5), 
                         name = 'Loss'))
fig.add_trace(go.Scatter(x= history.index.values, y= history['accuracy'].values, line=dict(color='MidnightBlue', width= 1.5), 
                         name = 'Accuracy'))
fig.add_trace(go.Scatter(x= history.index.values, y= history['mae'].values, line=dict(color='ForestGreen', width= 1.5), 
                         name = 'Mean Absolute Error (MAE)'))
fig.add_trace(go.Scatter(x= history.index.values, y= history['mse'].values, line=dict(color='purple', width= 1.5), 
                         name = 'Mean Squared Error (MSE)'))
fig.update_layout(legend=dict(y=0.5, traceorder='reversed', font_size=12))
fig.update_layout(dragmode='select', plot_bgcolor= 'white', height=600, hovermode='closest')
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig['layout']['xaxis'].update(range=[0, history.index.values.max()])
fig['layout']['yaxis'].update(range=[0, 1.0])
fig.show()

Next, we can plot the confusion matrix for our classifier.

In [13]:
Labels = list(Target.values())
Labels = [x.replace(' ','\n') for x in Labels]
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
Confusion_Matrix = confusion_matrix(y_test.idxmax(axis=1),
                                    pd.DataFrame(np.round(y_pred),columns = y_test.columns).astype(int).idxmax(axis=1))

_ = sns.heatmap(Confusion_Matrix.round(2), annot=True, annot_kws={"size": 14}, cmap="Blues", ax = ax[0])
_ = ax[0].set_xlabel('Predicted labels')
_ = ax[0].set_ylabel('True labels'); 
_ = ax[0].set_title('Confusion Matrix');
_ = ax[0].xaxis.set_ticklabels(Labels)
_ = ax[0].yaxis.set_ticklabels(Labels)

Confusion_Matrix = Confusion_Matrix.astype('float') / Confusion_Matrix.sum(axis=1)[:, np.newaxis]
_ = sns.heatmap(Confusion_Matrix.round(2), annot=True, annot_kws={"size": 14}, cmap="Greens", ax = ax[1],
               linewidths = 0.2, vmin=0, vmax=1, cbar_kws={"shrink": 1})
_ = ax[1].set_xlabel('Predicted labels')
_ = ax[1].set_ylabel('True labels'); 
_ = ax[1].set_title('Normalized Confusion Matrix');
_ = ax[1].xaxis.set_ticklabels(Labels)
_ = ax[1].yaxis.set_ticklabels(Labels)

Predictions

In [14]:
N = 20
Test = X_test[:N]
Ind = Test.index

Temp = []
for x in ['Exact', 'Predicted']:
    Temp.append(x)
    Temp.append(x)
    Temp.append(x)
header = [np.array(Temp, dtype = str), np.array(list(Target.values())*2)] 
Pred = pd.concat([y_test[:N].reset_index(drop = True), pd.DataFrame(model.predict(Test))], axis = 1)
Pred.columns = header
del header, Temp
Pred.round(2)
Pred.index = [x[0] for x in Data.loc[Data.index.isin(Ind),['Person']].values]
display(Pred.round(2))
Exact Predicted
BOGO Offers Discount Offers Informational Offers BOGO Offers Discount Offers Informational Offers
816afd4505d047cf893275fbfe825590 1 1 0 1.00 1.00 0.00
8c410d84af08408fb41f953c93ffac27 0 1 0 0.01 1.00 0.65
22d87f4f3e2b42f49a9af68a350cf3ef 0 1 1 0.84 1.00 1.00
d0cb3963974b4266bf7a8ede1d96a884 0 1 0 0.00 1.00 0.00
c2240708c0b84264ad9060467887346c 0 1 0 0.00 1.00 0.00
1a79f623402a4e53908e254531e04f26 1 1 1 0.99 1.00 0.57
06ab01a0775d4b35bceca2d607271874 1 1 0 1.00 0.99 0.00
467f5f3edc6c407ebb189d57e839bfd3 1 0 0 1.00 0.00 0.00
3b78a727167b432d8e1935227b286def 0 1 1 0.00 1.00 1.00
b5b17db2d0af49fe89abfd7076b605a8 1 0 1 1.00 0.00 1.00
8571868385524806bcacd7e73c1ae5e1 1 1 1 0.99 1.00 1.00
d80ce36710b34270bb543898db2f1882 0 1 0 0.99 0.95 0.00
97b938182cbd48e2b02c859dc8e7838c 0 0 0 0.00 0.00 0.00
03e866224ec641c58bc93a69be00de9c 1 1 0 0.15 1.00 0.00
a0eb0c8c7b0d4a69ab1784e377564379 1 0 0 1.00 0.00 0.00
16943b16c68041a79b0edd556c808fab 0 0 0 0.00 0.00 0.00
12437b2c02be4105aec2d09db79c3c21 0 0 0 0.00 0.00 0.00
5e386312836c43ca85550d8ebbbf12d2 0 0 0 0.00 0.00 0.00
6851449a9192478d86ae6c22e8b93200 0 1 1 1.00 0.93 1.00
60d0b2ba8ea3482f894c634b80801631 1 0 0 1.00 0.00 0.00